This is a cleaned version of 04_24_19 Temp link trait fish.Rmd/11_20_19_Temp_link_traits_fish.Rmd. Changes * improved formatting * easier to follow * species and region as random effects * scaling temperature values * setting nACQ to 0, see why here.

August 11, 2022: Working on Revisions from Global Ecology and Biogeography * Add random effects for order, genus, family, etc.

library(MuMIn)
library(lme4)
Loading required package: Matrix

Attaching package: ‘Matrix’

The following objects are masked from ‘package:tidyr’:

    expand, pack, unpack
library(data.table)
library(ggplot2)
library(here)
library(wesanderson) # to color plots
library(taxize) #to obtain order, genus, family
library(dplyr)
library(DHARMa)
Warning: package ‘DHARMa’ was built under R version 4.1.2
This is DHARMa 0.4.5. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
spp_master_ztemp_seus_buoy_scaled <- readRDS(here("Data","Spp_master", "spp_master_ztemp_seus_buoy_scaled.rds"))

Link up with spp key to add phylum, order, family, etc.

spp_key <- fread(here::here("Data","Spp_master","spp_key.csv"))

spp_master_ztemp_seus_buoy_scaled <- spp_master_ztemp_seus_buoy_scaled[spp_key, on = "spp"]

    

Running, and ranking arrival models without traits

##Here, I scaled all temperature values across all regions, and included species as a random effect. In order to do this, I had to change the algorithm slightly–nAGQ = 0.

Gain/Arrival Models with Additional random effects

#names of scaled columns
variables_scaled <- grep("*_scaled", names(spp_master_ztemp_seus_buoy_scaled), value = T)
#going to make loop to make all models I need to look at with a single temperature variable

arrival_model_comparison_taxonomy <- as.data.table(matrix(nrow = length(variables_scaled)))
arrival_model_comparison_taxonomy[, variable:=as.factor(V1)][, coef:=as.numeric(V1)][, p_value:=as.numeric(V1)][, AICc:=as.numeric(V1)]
arrival_model_comparison_taxonomy[, V1 := NULL]

for (i in 1:length(variables_scaled)){
  mod <- glmer(col ~ get(variables_scaled[i]) + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp),
               family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)
  arrival_model_comparison_taxonomy[i,variable := variables_scaled[i]]
  arrival_model_comparison_taxonomy[i,coef := coef(summary(mod))[,"Estimate"][2]]
  arrival_model_comparison_taxonomy[i,p_value := coef(summary(mod))[,"Pr(>|z|)"][2]]
  arrival_model_comparison_taxonomy[i,AICc := AICc(mod)]
  
  print(paste(i, length(variables_scaled), sep = "/"))
    
}

#by setting nACQ to 0, all models converged

saveRDS(object = arrival_model_comparison_taxonomy, file = here("Model_results","arrival_model_results_taxonomy.rds"))
fwrite(arrival_model_comparison_taxonomy, file = here("Model_results","arrival_model_results_taxonomy.csv"))
arrival_model_comparison_taxonomy <- readRDS(here::here("Model_results","arrival_model_results_taxonomy.rds"))

Model performance and assumptions.

#group by region
simulationOutput <- simulateResiduals(fittedModel = mod_extrarandom_gain)
simulationOutput.groupedbyreg <- recalculateResiduals(simulationOutput, group = spp_master_ztemp_seus_buoy_scaled.r$reg)
testDispersion(simulationOutput)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs.
    simulated

data:  simulationOutput
dispersion = 0.48913, p-value = 0.008
alternative hypothesis: two.sided

testDispersion(simulationOutput.groupedbyreg)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs.
    simulated

data:  simulationOutput
dispersion = 0.33711, p-value = 0.592
alternative hypothesis: two.sided

plot(simulationOutput.groupedbyreg)
DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details
qu = 0.25, log(sigma) = -2.803254 : outer Newton did not converge fully.
qu = 0.25, log(sigma) = -2.762825 : outer Newton did not converge fully.
qu = 0.25, log(sigma) = -2.928257 : outer Newton did not converge fully.
qu = 0.25, log(sigma) = -3.005512 : outer Newton did not converge fully.
qu = 0.25, log(sigma) = -3.082768 : outer Newton did not converge fully.
qu = 0.25, log(sigma) = -3.987401 : outer Newton did not converge fully.
qu = 0.25, log(sigma) = -4.089001 : outer Newton did not converge fully.
qu = 0.25, log(sigma) = -4.262087 : outer Newton did not converge fully.
qu = 0.25, log(sigma) = -4.28162 : outer Newton did not converge fully.
qu = 0.25, log(sigma) = -4.281743 : outer Newton did not converge fully.

#group by spp
simulationOutput.groupedbyspp <- recalculateResiduals(simulationOutput, group = spp_master_ztemp_seus_buoy_scaled.r$spp)
testDispersion(simulationOutput)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs.
    simulated

data:  simulationOutput
dispersion = 0.48913, p-value = 0.008
alternative hypothesis: two.sided

testDispersion(simulationOutput.groupedbyspp)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs.
    simulated

data:  simulationOutput
dispersion = 0.22649, p-value < 2.2e-16
alternative hypothesis: two.sided

plot(simulationOutput.groupedbyspp)
DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details

“The estimate for seasonality of -0.49 means that a 1 degree increase in seasonality is associated with a 0.49 decrease in the log-odds of a Gain occuring, compared to no Gain occuring. If we exponentiate this number then we obtain the odds ratio of 0.6108308, which means that for a 1 degree increase in Seasonality we expect to see (approximately) a 39% decrease in the odds of a Gain Occurring.” From this Stack Exchange post.

###Departure Models without Traits but with random effects for phylogeny

#group by region
simulationOutput_loss <- simulateResiduals(fittedModel = mod_extrarandom_loss)
simulationOutput.groupedbyreg_loss <- recalculateResiduals(simulationOutput_loss, group = spp_master_ztemp_seus_buoy_scaled.r$reg)
testDispersion(simulationOutput_loss)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated

data:  simulationOutput
dispersion = 0.61949, p-value = 0.08
alternative hypothesis: two.sided

testDispersion(simulationOutput.groupedbyreg_loss)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated

data:  simulationOutput
dispersion = 0.56908, p-value = 0.976
alternative hypothesis: two.sided

plot(simulationOutput.groupedbyreg_loss)
DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details

#no random effects
simulationOutput_loss_noRE <- simulateResiduals(fittedModel = mod_extrarandom_loss_noRE)
simulationOutput.groupedbyreg_loss_noRE <- recalculateResiduals(simulationOutput_loss_noRE, group = spp_master_ztemp_seus_buoy_scaled.r$reg)
testDispersion(simulationOutput_loss_noRE)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated

data:  simulationOutput
dispersion = 1.0002, p-value = 0.936
alternative hypothesis: two.sided

testDispersion(simulationOutput.groupedbyreg_loss_noRE)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated

data:  simulationOutput
dispersion = 27.227, p-value < 2.2e-16
alternative hypothesis: two.sided

plot(simulationOutput.groupedbyreg_loss_noRE)
DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details

#group by spp
simulationOutput.groupedbyspp_loss <- recalculateResiduals(simulationOutput_loss, group = spp_master_ztemp_seus_buoy_scaled.r$spp)
testDispersion(simulationOutput_loss)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated

data:  simulationOutput
dispersion = 0.61949, p-value = 0.08
alternative hypothesis: two.sided

testDispersion(simulationOutput.groupedbyspp_loss)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated

data:  simulationOutput
dispersion = 0.29456, p-value = 0.04
alternative hypothesis: two.sided

plot(simulationOutput.groupedbyspp_loss)
DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details

#with no random effects
simulationOutput.groupedbyspp_loss_noRE <- recalculateResiduals(simulationOutput_loss_noRE, group = spp_master_ztemp_seus_buoy_scaled.r$spp)
testDispersion(simulationOutput_loss_noRE)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated

data:  simulationOutput
dispersion = 1.0002, p-value = 0.936
alternative hypothesis: two.sided

testDispersion(simulationOutput.groupedbyspp_loss_noRE)

    DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated

data:  simulationOutput
dispersion = 3.251, p-value < 2.2e-16
alternative hypothesis: two.sided

plot(simulationOutput.groupedbyspp_loss_noRE)
DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details

“The estimate for seasonality of -0.3242 means that a 1 degree increase in seasonality is associated with a 0.3242 decrease in the log-odds of a Loss occuring, compared to no Loss occuring. If we exponentiate this number then we obtain the odds ratio of 0.7231, which means that for a 1 degree increase in Seasonality we expect to see (approximately) a 18% decrease in the odds of a Loss Occurring.” From this Stack Exchange post.

##Relative Variable Importance

NB: I didn’t end up using mean as a predictor variable (patterns captured in min and max, less relevant to thermal niche)

arrival_model_comparison_nomean <- arrival_model_comparison_taxonomy[!grepl("mean",variable),]

departure_model_comparison_nomean <- departure_model_comparison_taxonomy[!grepl("mean",variable),]

How does the null model with only random effects perform?

#add null to tables
intercept_arrival_mod_scaled <- glmer(col ~ + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)

r.squaredGLMM(intercept_arrival_mod_scaled)

intercept_departure_mod_scaled <- glmer(now_ext ~ + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)

r.squaredGLMM(intercept_departure_mod_scaled)

#best model better than null? yes
AICc(intercept_arrival_mod_scaled)-AICc(mod_extrarandom_gain)
AICc(intercept_departure_mod_scaled)-AICc(mod_extrarandom_loss)

How do other loss models perform with AICc less than 0.05?


mod_extrarandom_loss_2 <- glmer(now_ext ~ seas_sst_temp_change_lag2_scaled  + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)
r.squaredGLMM(mod_extrarandom_loss_2)

mod_extrarandom_loss_3 <- glmer(now_ext ~ min_sst_temp_change_lag2_scaled  + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)
r.squaredGLMM(mod_extrarandom_loss_3)

Relative Variable Importance for Arrival Models

#add ∆AIC to tables
min_arrival_AICc <- min(arrival_model_comparison_nomean[, AICc])
arrival_model_comparison_nomean[,"deltaAICc" := (AICc - min_arrival_AICc)]

#add relative likelihood exp( -0.5 * ∆AIC score for that model)
arrival_model_comparison_nomean[,"rel_likelihood" := exp((-0.5 * deltaAICc))]
#sum relative likelihoods across all models
arrival_model_comparison_nomean.likelihoodsum <- sum(arrival_model_comparison_nomean[, rel_likelihood])
#Akaike weight for a model is this rel_likelihood divided by the sum of these values across all models
arrival_model_comparison_nomean[,"akaike_weight" := rel_likelihood/arrival_model_comparison_nomean.likelihoodsum]

#I want to look at relative importance FOR:

#bottom/surface (bottom = includes sbt, grepl("sbt", variable))
arrival_bottom_importance <- sum(arrival_model_comparison_nomean[grep("sbt", variable), ]$akaike_weight)
arrival_surface_importance <- sum(arrival_model_comparison_nomean[grep("sst", variable), ]$akaike_weight)

#lag/not lagged (grepl("lag", variable))
arrival_lag_importance <- sum(arrival_model_comparison_nomean[grep("lag", variable), ]$akaike_weight)
arrival_nolag_importance <- sum(arrival_model_comparison_nomean[!grep("lag", variable), ]$akaike_weight)

#absolute (grepl("abs", variable))
arrival_abs_importance <- sum(arrival_model_comparison_nomean[grep("abs", variable), ]$akaike_weight)

#raw (!grepl("change"))
arrival_raw_importance <- sum(arrival_model_comparison_nomean[!grep("change", variable), ]$akaike_weight)

#change (grepl("change", variable), (!grepl("abs")))
arrival_change_importance <- sum(arrival_model_comparison_nomean[grep("change", variable), ][!grep("abs", variable),]$akaike_weight)

#type of temp variable (max, min, seas)
arrival_max_temp_importance <- sum(arrival_model_comparison_nomean[grep("max", variable), ]$akaike_weight)
arrival_min_temp_importance <- sum(arrival_model_comparison_nomean[grep("min", variable), ]$akaike_weight)
arrival_seas_temp_importance <- sum(arrival_model_comparison_nomean[grep("seas", variable), ]$akaike_weight)

arrival_model_comparison_nomean_akaikeweights <- arrival_model_comparison_nomean

saveRDS(arrival_model_comparison_nomean_akaikeweights, file = here("Model_results","arrival_model_comparison_nomean_akaikeweights.rds"))
saveRDS(arrival_model_comparison_nomean_akaikeweights, file = here("tables","arrival_model_comparison_nomean_akaikeweights.rds"))
arrival_model_comparison_nomean_akaikeweights <- readRDS(file = here("Model_results","arrival_model_comparison_nomean_akaikeweights.rds"))

Relative Variable Importance for different Pairs of Transformation and Metric


#I want to look at relative importance FOR 
#models with raw and min
arrival_raw_min_importance <- sum(arrival_model_comparison_nomean[grep("min", variable), ][!grep("change", variable)]$akaike_weight)

#models with raw and max
arrival_raw_max_importance <- sum(arrival_model_comparison_nomean[grep("max", variable), ][!grep("change", variable)]$akaike_weight)

#models with raw and seas
arrival_raw_seas_importance <- sum(arrival_model_comparison_nomean[grep("seas", variable), ][!grep("change", variable)]$akaike_weight)


#models with change and min
arrival_change_min_importance <- sum(arrival_model_comparison_nomean[grep("min_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with change and max
arrival_change_max_importance <- sum(arrival_model_comparison_nomean[grep("max_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with change and seas
arrival_change_seas_importance <- sum(arrival_model_comparison_nomean[grep("seas_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with abs change and min
arrival_abs_change_min_importance <- sum(arrival_model_comparison_nomean[grep("min_s.t_temp_change_abs", variable), ]$akaike_weight)

#models with abs change and max
arrival_abs_change_max_importance <- sum(arrival_model_comparison_nomean[grep("max_s.t_temp_change_abs", variable), ]$akaike_weight)

#models with abs change and seas
arrival_abs_change_seas_importance <- sum(arrival_model_comparison_nomean[grep("seas_s.t_temp_change_abs", variable), ]$akaike_weight)

arrival_transform_metric_comparison <- data.table(Transformation =
                                                c("raw", "raw", "raw", "change", "change", "change", "abs_change", "abs_change", "abs_change"), 
                                              Metric = 
                                                rep(c("min", "max", "seas"), 3), 
                                              RVI = 
                                                c(arrival_raw_min_importance, arrival_raw_max_importance, 
                                                  arrival_raw_seas_importance, arrival_change_min_importance, 
                                                  arrival_change_max_importance, arrival_change_seas_importance, 
                                                  arrival_abs_change_min_importance, arrival_abs_change_max_importance, 
                                                  arrival_abs_change_seas_importance))

Relative Variable Importance Departure Models

#add ∆AIC to tables
min_departure_AICc <- min(departure_model_comparison_nomean[, AICc])
departure_model_comparison_nomean[,"deltaAICc" := (AICc - min_departure_AICc)]

#add relative likelihood exp( -0.5 * ∆AIC score for that model)
departure_model_comparison_nomean[,"rel_likelihood" := exp((-0.5 * deltaAICc))]
#sum relative likelihoods across all models
departure_model_comparison_nomean.likelihoodsum <- sum(departure_model_comparison_nomean[, rel_likelihood])
#Akaike weight for a model is this rel_likelihood devided by the sum of these values across all models
departure_model_comparison_nomean[,"akaike_weight" := rel_likelihood/departure_model_comparison_nomean.likelihoodsum]

#I want to look at relative importance FOR 
#bottom/surface (bottom = includes sbt, grepl("sbt", variable))
departure_bottom_importance <- sum(departure_model_comparison_nomean[grep("sbt", variable), ]$akaike_weight)
departure_surface_importance <- sum(departure_model_comparison_nomean[grep("sst", variable), ]$akaike_weight)

#lag/not lagged (grepl("lag", variable))
departure_lag_importance <- sum(departure_model_comparison_nomean[grep("lag", variable), ]$akaike_weight)
departure_nolag_importance <- sum(departure_model_comparison_nomean[!grep("lag", variable), ]$akaike_weight)

#absolute (grepl("abs", variable))
departure_abs_importance <- sum(departure_model_comparison_nomean[grep("abs", variable), ]$akaike_weight)

#raw (!grepl("change"))
departure_raw_importance <- sum(departure_model_comparison_nomean[!grep("change", variable), ]$akaike_weight)

#change (grepl("change", variable), (!grepl("abs")))
departure_change_importance <- sum(departure_model_comparison_nomean[grep("change", variable), ][!grep("abs", variable),]$akaike_weight)

#type of temp variable (max, min, seas)
departure_max_temp_importance <- sum(departure_model_comparison_nomean[grep("max", variable), ]$akaike_weight)
departure_min_temp_importance <- sum(departure_model_comparison_nomean[grep("min", variable), ]$akaike_weight)
departure_seas_temp_importance <- sum(departure_model_comparison_nomean[grep("seas", variable), ]$akaike_weight)

departure_model_comparison_nomean_akaikeweights <- departure_model_comparison_nomean

saveRDS(departure_model_comparison_nomean_akaikeweights, file = here("Model_results","departure_model_comparison_nomean_akaikeweights.rds"))
saveRDS(departure_model_comparison_nomean_akaikeweights, file = here("tables","departure_model_comparison_nomean_akaikeweights.rds"))
departure_model_comparison_nomean_akaikeweights <- readRDS(file = here("Model_results","departure_model_comparison_nomean_akaikeweights.rds"))

Relative Variable Importance for Pairs of Transformation and Metric Predicting departures


#I want to look at relative importance FOR 
#models with raw and min
departure_raw_min_importance <- sum(departure_model_comparison_nomean[grep("min", variable), ][!grep("change", variable)]$akaike_weight)

#models with raw and max
departure_raw_max_importance <- sum(departure_model_comparison_nomean[grep("max", variable), ][!grep("change", variable)]$akaike_weight)

#models with raw and seas
departure_raw_seas_importance <- sum(departure_model_comparison_nomean[grep("seas", variable), ][!grep("change", variable)]$akaike_weight)


#models with change and min
departure_change_min_importance <- sum(departure_model_comparison_nomean[grep("min_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with change and max
departure_change_max_importance <- sum(departure_model_comparison_nomean[grep("max_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with change and seas
departure_change_seas_importance <- sum(departure_model_comparison_nomean[grep("seas_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with abs change and min
departure_abs_change_min_importance <- sum(departure_model_comparison_nomean[grep("min_s.t_temp_change_abs", variable), ]$akaike_weight)

#models with abs change and max
departure_abs_change_max_importance <- sum(departure_model_comparison_nomean[grep("max_s.t_temp_change_abs", variable), ]$akaike_weight)

#models with abs change and seas
departure_abs_change_seas_importance <- sum(departure_model_comparison_nomean[grep("seas_s.t_temp_change_abs", variable), ]$akaike_weight)

departure_transform_metric_comparison <- data.table(Transformation =
                                                c("raw", "raw", "raw", "change", "change", "change", "abs_change", "abs_change", "abs_change"), 
                                              Metric = 
                                                rep(c("min", "max", "seas"), 3), 
                                              RVI = 
                                                c(departure_raw_min_importance, departure_raw_max_importance, 
                                                  departure_raw_seas_importance, departure_change_min_importance, 
                                                  departure_change_max_importance, departure_change_seas_importance, 
                                                  departure_abs_change_min_importance, departure_abs_change_max_importance, 
                                                  departure_abs_change_seas_importance))

Let’s see how the RVI importance varies by looking at specific lag values


#first, table for lags
lags_RVI_scaled <- data.table(arrival_lag = c(0:9), arrival_RVI = 0, departure_RVI = 0)

#arrivals
lags_RVI_scaled[1,2] <- 1-sum(arrival_model_comparison_nomean[grep("lag", variable), ]$akaike_weight)
lags_RVI_scaled[2,2] <- sum(arrival_model_comparison_nomean[grep("lag1", variable), ]$akaike_weight)
lags_RVI_scaled[3,2] <- sum(arrival_model_comparison_nomean[grep("lag2", variable), ]$akaike_weight)
lags_RVI_scaled[4,2] <- sum(arrival_model_comparison_nomean[grep("lag3", variable), ]$akaike_weight)
lags_RVI_scaled[5,2] <- sum(arrival_model_comparison_nomean[grep("lag4", variable), ]$akaike_weight)
lags_RVI_scaled[6,2] <- sum(arrival_model_comparison_nomean[grep("lag5", variable), ]$akaike_weight)
lags_RVI_scaled[7,2] <- sum(arrival_model_comparison_nomean[grep("lag6", variable), ]$akaike_weight)
lags_RVI_scaled[8,2] <- sum(arrival_model_comparison_nomean[grep("lag7", variable), ]$akaike_weight)
lags_RVI_scaled[9,2] <- sum(arrival_model_comparison_nomean[grep("lag8", variable), ]$akaike_weight)
lags_RVI_scaled[10,2] <- sum(arrival_model_comparison_nomean[grep("lag9", variable), ]$akaike_weight)

#departures
lags_RVI_scaled[1,3] <- 1-sum(departure_model_comparison_nomean[grep("lag", variable), ]$akaike_weight)
lags_RVI_scaled[2,3] <- sum(departure_model_comparison_nomean[grep("lag1", variable), ]$akaike_weight)
lags_RVI_scaled[3,3] <- sum(departure_model_comparison_nomean[grep("lag2", variable), ]$akaike_weight)
lags_RVI_scaled[4,3] <- sum(departure_model_comparison_nomean[grep("lag3", variable), ]$akaike_weight)
lags_RVI_scaled[5,3] <- sum(departure_model_comparison_nomean[grep("lag4", variable), ]$akaike_weight)
lags_RVI_scaled[6,3] <- sum(departure_model_comparison_nomean[grep("lag5", variable), ]$akaike_weight)
lags_RVI_scaled[7,3] <- sum(departure_model_comparison_nomean[grep("lag6", variable), ]$akaike_weight)
lags_RVI_scaled[8,3] <- sum(departure_model_comparison_nomean[grep("lag7", variable), ]$akaike_weight)
lags_RVI_scaled[9,3] <- sum(departure_model_comparison_nomean[grep("lag8", variable), ]$akaike_weight)
lags_RVI_scaled[10,3] <- sum(departure_model_comparison_nomean[grep("lag9", variable), ]$akaike_weight)

saveRDS(lags_RVI_scaled, here("Model_results","lags_RVI_scaled.rds"))

#visualize lags through time
ggplot(data = lags_RVI_scaled, aes(x = arrival_lag)) +
  geom_line(aes(y = arrival_RVI), ) +
  geom_line(aes(y = departure_RVI), linetype = "dashed") +
  labs(x = "Lag (years)", y = "Relative Variable Importance") +
  scale_color_discrete() + #how to make a legend
  theme_classic() +
  theme(text = element_text(size = 20)) +
  scale_x_continuous(breaks = c(0:9))

Table for RVI data for arrival

arrival_departure <- c("arrival","arrival","arrival","arrival","arrival", "arrival","arrival", "arrival", "arrival","arrival","arrival","arrival","arrival","arrival", "arrival","arrival", "arrival", "arrival","arrival")
type <- c("Depth", "Depth", "lag", "lag", "Transformation", "Transformation", "Transformation", "Metric", "Metric", "Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric")
variable <- c("Bottom", "Surface", "no_lag", "lagged", "|Change|", "Change", "Raw", "Min", "Max", "Seas", "Raw x Min", "Raw x Max", "Raw x Seas", "Change x Min", "Change x Max", "Change x Seas", "|Change| x Min", "|Change| x Max", "|Change| x Seas")


value <- c(arrival_bottom_importance, arrival_surface_importance, arrival_nolag_importance, arrival_lag_importance, arrival_abs_importance, arrival_change_importance, arrival_raw_importance, arrival_min_temp_importance, arrival_max_temp_importance, arrival_seas_temp_importance, arrival_raw_min_importance, arrival_raw_max_importance, arrival_raw_seas_importance, arrival_change_min_importance, arrival_change_max_importance, arrival_change_seas_importance, arrival_abs_change_min_importance, arrival_abs_change_max_importance, arrival_abs_change_seas_importance)

RVI_arrival <- data.table(arrival_departure, type, variable, value)

Table for RVI data for departure

arrival_departure <- c("departure","departure","departure","departure","departure", "departure","departure", "departure", "departure","departure","departure","departure","departure","departure", "departure","departure", "departure", "departure","departure")
type <- c("Depth", "Depth", "lag", "lag", "Transformation", "Transformation", "Transformation", "Metric", "Metric", "Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric")
variable <- c("Bottom", "Surface", "no_lag", "lagged", "|Change|", "Change", "Raw", "Min", "Max", "Seas", "Raw x Min", "Raw x Max", "Raw x Seas", "Change x Min", "Change x Max", "Change x Seas", "|Change| x Min", "|Change| x Max", "|Change| x Seas")
value <- c(departure_bottom_importance, departure_surface_importance, departure_nolag_importance, departure_lag_importance, departure_abs_importance, departure_change_importance, departure_raw_importance, departure_min_temp_importance, departure_max_temp_importance, departure_seas_temp_importance, departure_raw_min_importance, departure_raw_max_importance, departure_raw_seas_importance, departure_change_min_importance, departure_change_max_importance, departure_change_seas_importance, departure_abs_change_min_importance, departure_abs_change_max_importance, departure_abs_change_seas_importance)

RVI_departure <- data.table(arrival_departure, type, variable, value)

Merge RVI tables for arrival and departure, and then graph

RVI_table <- as.data.table(rbind(RVI_arrival, RVI_departure)) #bind RVI for arrivals and departures

RVI_table.r <- RVI_table[!grep("Surface", variable)][!grep("lagged", variable)]

#change order of factor levels
RVI_table.r[, variable := factor(variable, levels = c("Bottom", "no_lag", "Raw", "Change", "|Change|", "Min", "Max", "Seas"))]

saveRDS(RVI_table, file = here("Model_results", "RVI_table.rds"))

ggplot(data = RVI_table.r, aes(x=variable, y = value, fill = arrival_departure)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_discrete(breaks = c("Bottom", "no_lag", "Raw", "Change", "|Change|", "Min", "Max", "Seas"), 
                   labels = c("Bottom Temp", "No Lag", "Raw Temp", "Change in \nTemp", "| Change in\n Temp |", "Min Temp", "Max Temp", "Seas")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance")  +
  theme_bw()

Another way to visualize is stacked bar plot.

#put levels into correct order for viewing, and add dummy level for legend
RVI_arrival[, variable := factor(variable, levels = c("Bottom", "Surface","  ", "Raw", "Change", "|Change|","", "no_lag", "lagged", "Min", "Max", "Seas"))]
RVI_arrival[, type := factor(type, levels = c("Depth", "Transformation", "Metric", "lag"))]

RVI_arrival_nolag <- RVI_arrival[type != "lag",]
RVI_arrival_nolag[, type := factor(type, levels = c("Depth", "Transformation", "Metric"))][, variable := factor(variable, levels = c("Bottom", "Surface","  ", "Raw", "Change", "|Change|","", "Min", "Max", "Seas"))]

saveRDS(RVI_arrival_nolag, here("Model_results", "RVI_arrival_nolag.rds"))

#and again for departure
RVI_departure[, variable := factor(variable, levels = c("Bottom", "Surface","  ", "Raw", "Change", "|Change|","", "no_lag", "lagged", "Min", "Max", "Seas"))]
RVI_departure[, type := factor(type, levels = c("Depth", "Transformation", "Metric", "lag"))]

RVI_departure_nolag <- RVI_departure[type != "lag",]
RVI_departure_nolag[, type := factor(type, levels = c("Depth", "Transformation", "Metric"))][, variable := factor(variable, levels = c("Bottom", "Surface","  ", "Raw", "Change", "|Change|","", "Min", "Max", "Seas"))]

saveRDS(RVI_departure_nolag, here("Model_results", "RVI_departure_nolag.rds"))


#plotting
pal <- wes_palette("GrandBudapest1", 8, type = "continuous")
colors <- c(pal[1], pal[2], "#FFFFFF", pal[3], pal[4], pal[5],"#FFFFFF", pal[6], pal[7], pal[8])

#arrivals
RVI_arrival_plot <- ggplot(data = RVI_arrival_nolag, aes(x=type, y = value, fill = variable)) +
  geom_bar(stat="identity", color = "black", size = 0, width = 0.8) +
  scale_x_discrete(breaks = c("Depth", "Transformation", "Metric"), labels = c("Depth", "Transformation", "Metric")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance") +
  theme_classic() +
  theme(text = element_text(size = 11.5), 
        legend.position = "right", 
        legend.title = element_blank(), 
        legend.spacing.x = unit(0.2, 'cm'),
        legend.justification = "center",
        aspect.ratio = (1),
        #, legend.position = "none"
        ) +
  guides(fill=guide_legend(ncol=1)) +
  scale_fill_manual(values = colors,
                    drop = F)

#departure
RVI_departure_plot <- ggplot(data = RVI_departure_nolag, aes(x=type, y = value, fill = variable)) +
  geom_bar(stat="identity", color = "black", size = 0, width = 0.8) +
  scale_x_discrete(breaks = c("Depth", "Transformation", "Metric"), labels = c("Depth", "Transformation", "Metric")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance") +
  theme_classic() +
  theme(text = element_text(size = 11.5), 
        legend.position = "right", 
        legend.title = element_blank(), 
        legend.spacing.x = unit(0.2, 'cm'),
        legend.justification = "center",
        aspect.ratio = (1),
        #, legend.position = "none"
        ) +
  guides(fill=guide_legend(ncol=1)) +
  scale_fill_manual(values = colors,
                    drop = F)

RVI_table
RVI_arrival_plot
RVI_departure_plot

Next step is to add the coefficients on to the bar chart. To do this, I will need to sum (aikake weight of model including variable x coefficient). I think this will be easy enough Using arrival_model_comparison_nomean data frame. I will add new column to data frame of aikaike weight x coefficient

#for some reason the coefficient is a factor
arrival_model_comparison_nomean[,coef_num := as.numeric(as.character(coef))]
arrival_model_comparison_nomean[,aw_coef := akaike_weight * coef_num]

#bottom temperature avg coefficient
arrival_bottom_temp_avg_coef <- sum(arrival_model_comparison_nomean[grep("sbt", variable)]$coef_num)

#surface temperature avg coefficient
arrival_surface_temp_avg_coef <- sum(arrival_model_comparison_nomean[grep("sst", variable)]$coef_num)

#raw seasonality avg coefficient
arrival_seas_raw_avg_coef <- 
  sum(arrival_model_comparison_nomean[grep("seas", variable), ][!grep("change", variable)]$coef_num)

#max abs change avg coefficient
arrival_max_abs_change_avg_coef <- 
  sum(arrival_model_comparison_nomean[grep("max_s.t_temp_change_abs", variable), ]$coef_num)
#for some reason the coefficient is a factor
departure_model_comparison_nomean[,coef_num := as.numeric(as.character(coef))]
departure_model_comparison_nomean[,aw_coef := akaike_weight * coef_num]

#bottom temperature avg coefficient
departure_bottom_temp_avg_coef <- sum(departure_model_comparison_nomean[grep("sbt", variable)]$coef_num)

#surface temperature avg coefficient
departure_surface_temp_avg_coef <- sum(departure_model_comparison_nomean[grep("sst", variable)]$coef_num)

#raw seasonality avg coefficient
departure_seas_raw_avg_coef <- 
  sum(departure_model_comparison_nomean[grep("seas", variable), ][!grep("change", variable)]$coef_num)

#min change avg coefficient
departure_min_abs_change_avg_coef <- 
  sum(departure_model_comparison_nomean[grep("min_s.t_temp_change", variable), ][!grep("abs", variable)]$coef_num)

#seas change avg coefficient
departure_seas_change_avg_coef <- 
  sum(departure_model_comparison_nomean[grep("seas_s.t_temp_change", variable), ][!grep("abs", variable)]$coef_num)




(These analyses were irrelevant and therefore not repeated for revisions) ##What if we run these models with fish only? No traits? How do inverts change the story? Because it looks like above, not a ton of invertebrates are going in and out.

#list of fish names from trait database
traits <- fread("TraitCollectionFishNAtlanticNEPacificContShelf (3).csv")

#make fish no fish key
fish <- data.table(spp = unique(traits$taxon), fish = "Y")

#combine with spp master
spp_master_ztemp_seus_buoy_scaled_fishonly <- merge(spp_master_ztemp_seus_buoy_scaled, fish, all.x = T)

#get rid of any rows that aren't observations of fish
spp_master_ztemp_seus_buoy_scaled_fishonly2 <- spp_master_ztemp_seus_buoy_scaled_fishonly[fish == "Y",]

Running models for fish only


#names of scaled columns
scaled_cols <- grep("*_scaled", names(spp_master_ztemp_seus_buoy_scaled_fishonly2), value = T)

#going to make loop to make all models I need to look at with single variables
variables_scaled <- colnames(spp_master_ztemp_seus_buoy_scaled_fishonly2[,scaled_cols, with = FALSE])

arrival_model_comparison_spprandom_scaled_seus_update_fishonly <- as.data.table(matrix(nrow = length(variables_scaled)))
arrival_model_comparison_spprandom_scaled_seus_update_fishonly[, variable:=as.factor(V1)][, coef:=as.numeric(V1)][, p_value:=as.numeric(V1)][, AICc:=as.numeric(V1)]
arrival_model_comparison_spprandom_scaled_seus_update_fishonly[, V1 := NULL]

for (i in 1:length(variables_scaled)){
  mod <- glmer(col ~ get(variables_scaled[i]) + (1|reg) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled_fishonly2, nAGQ = 0)
  arrival_model_comparison_spprandom_scaled_seus_update_fishonly[i,variable := variables_scaled[i]]
  arrival_model_comparison_spprandom_scaled_seus_update_fishonly[i,coef := coef(summary(mod))[,"Estimate"][2]]
  arrival_model_comparison_spprandom_scaled_seus_update_fishonly[i,p_value := coef(summary(mod))[,"Pr(>|z|)"][2]]
  arrival_model_comparison_spprandom_scaled_seus_update_fishonly[i,AICc := AICc(mod)]
  
  print(paste(i, length(variables_scaled), sep = "/"))
    
}

#by setting nACQ to 0, all models converged

departure Models without Traits

#see setup above

departure_model_comparison_spprandom_scaled_seus_update_fishonly <- as.data.table(matrix(nrow = length(variables_scaled)))
departure_model_comparison_spprandom_scaled_seus_update_fishonly[, variable:=as.factor(V1)][, coef:=as.numeric(V1)][, p_value:=as.numeric(V1)][, AICc:=as.numeric(V1)]
departure_model_comparison_spprandom_scaled_seus_update_fishonly[, V1 := NULL]

for (i in 1:length(variables_scaled)){
  mod <- glmer(now_ext ~ get(variables_scaled[i]) + (1|reg) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled_fishonly2, nAGQ = 0)
  departure_model_comparison_spprandom_scaled_seus_update_fishonly[i,variable := variables_scaled[i]]
  departure_model_comparison_spprandom_scaled_seus_update_fishonly[i,coef := coef(summary(mod))[,"Estimate"][2]]
  departure_model_comparison_spprandom_scaled_seus_update_fishonly[i,p_value := coef(summary(mod))[,"Pr(>|z|)"][2]]
  departure_model_comparison_spprandom_scaled_seus_update_fishonly[i,AICc := AICc(mod)]
  
  print(paste(i, length(variables_scaled), sep = "/"))
    
}

#by setting nACQ to 0, all models converged

Relative Variable Importance

arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean <- arrival_model_comparison_spprandom_scaled_seus_update_fishonly[!grepl("mean",variable),]

departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean <- departure_model_comparison_spprandom_scaled_seus_update_fishonly[!grepl("mean",variable),]

#comparison to null
col_mod_null <- glmer(col ~ (1|reg) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled_fishonly2, nAGQ = 0)
AICc(col_mod_null)-min(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean$AICc)

ext_mod_null <- glmer(ext ~ (1|reg) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled_fishonly2, nAGQ = 0)
AICc(ext_mod_null)-min(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean$AICc)

Relative Variable Importance for arrival Models

#add ∆AIC to tables
min_col_AICc_fishonly <- min(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[, AICc])
arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"deltaAICc" := (AICc - min_col_AICc_fishonly)]

#add relative likelihood exp( -0.5 * ∆AIC score for that model)
arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"rel_likelihood" := exp((-0.5 * deltaAICc))]
#sum relative likelihoods across all models
arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean.likelihoodsum <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[, rel_likelihood])
#Akaike weight for a model is this rel_likelihood devided by the sum of these values across all models
arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"akaike_weight" := rel_likelihood/arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean.likelihoodsum]

#I want to look at relative importance FOR 
#bottom/surface (bottom = includes sbt, grepl("sbt", variable))
col_bottom_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("sbt", variable), ]$akaike_weight)
col_surface_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("sst", variable), ]$akaike_weight)

#lag/not lagged (grepl("lag", variable))
col_lag_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag", variable), ]$akaike_weight)
col_nolag_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[!grep("lag", variable), ]$akaike_weight)

#absolute (grepl("abs", variable))
col_abs_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("abs", variable), ]$akaike_weight)

#raw (!grepl("change"))
col_raw_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[!grep("change", variable), ]$akaike_weight)

#change (grepl("change", variable), (!grepl("abs")))
col_change_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("change", variable), ][!grep("abs", variable),]$akaike_weight)

#type of temp variable (max, min, seas)
col_max_temp_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("max", variable), ]$akaike_weight)
col_min_temp_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("min", variable), ]$akaike_weight)
col_seas_temp_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("seas", variable), ]$akaike_weight)

Relative Variable Importance for departure Models

#add ∆AIC to tables
min_ext_AICc_fishonly <- min(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[, AICc])
departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"deltaAICc" := (AICc - min_ext_AICc_fishonly)]

#add relative likelihood exp( -0.5 * ∆AIC score for that model)
departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"rel_likelihood" := exp((-0.5 * deltaAICc))]
#sum relative likelihoods across all models
departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean.likelihoodsum <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[, rel_likelihood])
#Akaike weight for a model is this rel_likelihood devided by the sum of these values across all models
departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"akaike_weight" := rel_likelihood/departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean.likelihoodsum]

#I want to look at relative importance FOR 
#bottom/surface (bottom = includes sbt, grepl("sbt", variable))
ext_bottom_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("sbt", variable), ]$akaike_weight)
ext_surface_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("sst", variable), ]$akaike_weight)

#lag/not lagged (grepl("lag", variable))
ext_lag_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag", variable), ]$akaike_weight)
ext_nolag_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[!grep("lag", variable), ]$akaike_weight)

#absolute (grepl("abs", variable))
ext_abs_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("abs", variable), ]$akaike_weight)

#raw (!grepl("change"))
ext_raw_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[!grep("change", variable), ]$akaike_weight)

#change (grepl("change", variable), (!grepl("abs")))
ext_change_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("change", variable), ][!grep("abs", variable),]$akaike_weight)

#type of temp variable (max, min, seas)
ext_max_temp_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("max", variable), ]$akaike_weight)
ext_min_temp_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("min", variable), ]$akaike_weight)
ext_seas_temp_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("seas", variable), ]$akaike_weight)

Let’s see how the RVI importance varies by looking at specific lag values

#first, table for lags
lags_v_RVI_spprandom_scaled_fishonly <- data.table(col_lag = c(0:9), col_RVI = 0, departure_RVI = 0)
#arrival
lags_v_RVI_spprandom_scaled_fishonly[1,2] <- 1-sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[2,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag1", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[3,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag2", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[4,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag3", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[5,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag4", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[6,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag5", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[7,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag6", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[8,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag7", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[9,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag8", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[10,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag9", variable), ]$akaike_weight)
#departure
lags_v_RVI_spprandom_scaled_fishonly[1,3] <- 1-sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[2,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag1", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[3,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag2", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[4,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag3", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[5,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag4", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[6,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag5", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[7,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag6", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[8,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag7", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[9,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag8", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[10,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag9", variable), ]$akaike_weight)

ggplot(data = lags_v_RVI_spprandom_scaled_fishonly, aes(x = col_lag)) +
  geom_line(aes(y = col_RVI), col = "blue") +
  geom_line(aes(y = departure_RVI), col = "red") +
  labs(x = "Lag", y = "Relative Variable Importance") +
  scale_color_discrete() + #how to make a legend
  theme_bw()

Table for RVI data for arrival

col_ext <- c("col","col","col","col","col", "col","col", "col", "col","col")
type <- c("depth", "depth", "lag", "lag", "Change?", "Change?", "Change?", "Temp", "Temp", "Temp")
variable <- c("bottom", "surface", "no_lag", "lagged", "absolute_value_change", "change", "raw", "Min", "Max", "Seas")
value <- c(col_bottom_importance_fishonly, col_surface_importance_fishonly, col_nolag_importance_fishonly, col_lag_importance_fishonly, col_abs_importance_fishonly, col_change_importance_fishonly, col_raw_importance_fishonly, col_min_temp_importance_fishonly, col_max_temp_importance_fishonly, col_seas_temp_importance_fishonly)

RVI_col_fishonly <- data.table(col_ext, type, variable, value)

Table for RVI data for departure

col_ext <- c("ext","ext","ext","ext","ext", "ext", "ext", "ext", "ext", "ext")
type <- c("depth", "depth", "lag", "lag", "Change?", "Change?", "Change?", "Temp", "Temp", "Temp")
variable <- c("bottom", "surface", "no_lag", "lagged", "absolute_value_change", "change", "raw", "Min", "Max", "Seas")
value <- c(ext_bottom_importance_fishonly, ext_surface_importance_fishonly, ext_nolag_importance_fishonly, ext_lag_importance_fishonly, ext_abs_importance_fishonly, ext_change_importance_fishonly, ext_raw_importance_fishonly, ext_min_temp_importance_fishonly, ext_max_temp_importance_fishonly, ext_seas_temp_importance_fishonly)

RVI_ext_fishonly <- data.table(col_ext, type, variable, value)

Merge RVI tables for arrival and departure, and then graph

RVI_table_fishonly <- as.data.table(rbind(RVI_col_fishonly, RVI_ext_fishonly)) #bind RVI for arrivals and departures

RVI_table.r_fishonly <- RVI_table_fishonly[!grep("surface", variable)][!grep("lagged", variable)]

#change order of factor levels
RVI_table.r_fishonly[, variable := factor(variable, levels = c("bottom", "no_lag", "raw", "change", "absolute_value_change", "Min", "Max", "Seas"))]

save(RVI_table_fishonly, file = "RVI_table_fishonly.Rdata")

ggplot(data = RVI_table.r_fishonly, aes(x=variable, y = value, fill = col_ext)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_discrete(breaks = c("bottom", "no_lag", "raw", "change", "absolute_value_change", "Min", "Max", "Seas"), labels = c("Bottom Temp", "No Lag", "Raw Temp", "Change in \nTemp", "| Change in\n Temp |", "Min Temp", "Max Temp", "Seas")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance")  +
  theme_bw()

Another way to visualize is stacked bar plot.

#put levels into correct order for viewing
RVI_table_fishonly[, variable := factor(variable, levels = c("bottom", "surface", "absolute_value_change", "change", "raw", "no_lag", "lagged", "Min", "Max", "Seas"))]
#make types factors as well
RVI_table_fishonly[,type := as.factor(type)]

#arrival
RVI_col_plot_fishonly <- ggplot(data = RVI_table_fishonly[col_ext == "col",], aes(x=type, y = value, fill = variable)) +
  geom_bar(stat="identity", color = "black") +
  scale_x_discrete(breaks = c("Change?", "depth", "lag", "Temp"), labels = c("Change in\ntemp?", "Metric\nDepth", "Lagged?", "Temp")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance") +
  theme_classic() +
  theme(text = element_text(size = 22)
        #, legend.position = "none"
        )

#departure
RVI_ext_plot_fishonly <- ggplot(data = RVI_table_fishonly[col_ext == "ext",], aes(x=type, y = value, fill = variable)) +
  geom_bar(stat="identity", color = "black") +
  scale_x_discrete(breaks = c("Change?", "depth", "lag", "Temp"), labels = c("Change in\ntemp?", "Metric\nDepth", "Lagged?", "Temp")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance") +
  theme_classic()  +
  theme(text = element_text(size = 22)
        #, legend.position = "none"
        )

RVI_table_fishonly
RVI_col_plot_fishonly
RVI_ext_plot_fishonly
---
title: "Arrival and Departure Final Models"
output: html_notebook
---

This is a cleaned version of 04_24_19 Temp link trait fish.Rmd/11_20_19_Temp_link_traits_fish.Rmd.
Changes
* improved formatting
* easier to follow
* species and region as random effects
* scaling temperature values
* setting nACQ to 0, see why  [here](https://stats.stackexchange.com/questions/77313/why-cant-i-match-glmmTMB-family-binomial-output-with-manual-implementation-of-g). 

August 11, 2022: Working on Revisions from Global Ecology and Biogeography
* Add random effects for order, genus, family, etc.

```{r setup}
library(MuMIn)
library(lme4)
library(data.table)
library(ggplot2)
library(here)
library(wesanderson) # to color plots
library(taxize) #to obtain order, genus, family
library(dplyr)
library(DHARMa)

spp_master_ztemp_seus_buoy_scaled <- readRDS(here("Data","Spp_master", "spp_master_ztemp_seus_buoy_scaled.rds"))
```

Link up with spp key to add phylum, order, family, etc.
```{r pull in spp, order, family, etc.}
spp_key <- fread(here::here("Data","Spp_master","spp_key.csv"))

spp_master_ztemp_seus_buoy_scaled <- spp_master_ztemp_seus_buoy_scaled[spp_key, on = "spp"]

    
```


Running, and ranking arrival models without traits

##Here, I scaled all temperature values across all regions, and included species as a random effect. In order to do this, I had to change the algorithm slightly--nAGQ = 0.

Gain/Arrival Models with Additional random effects

```{r arrival models without traits additional random effects for phylogeny }
#names of scaled columns
variables_scaled <- grep("*_scaled", names(spp_master_ztemp_seus_buoy_scaled), value = T)
#going to make loop to make all models I need to look at with a single temperature variable

arrival_model_comparison_taxonomy <- as.data.table(matrix(nrow = length(variables_scaled)))
arrival_model_comparison_taxonomy[, variable:=as.factor(V1)][, coef:=as.numeric(V1)][, p_value:=as.numeric(V1)][, AICc:=as.numeric(V1)]
arrival_model_comparison_taxonomy[, V1 := NULL]

for (i in 1:length(variables_scaled)){
  mod <- glmer(col ~ get(variables_scaled[i]) + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp),
               family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)
  arrival_model_comparison_taxonomy[i,variable := variables_scaled[i]]
  arrival_model_comparison_taxonomy[i,coef := coef(summary(mod))[,"Estimate"][2]]
  arrival_model_comparison_taxonomy[i,p_value := coef(summary(mod))[,"Pr(>|z|)"][2]]
  arrival_model_comparison_taxonomy[i,AICc := AICc(mod)]
  
  print(paste(i, length(variables_scaled), sep = "/"))
    
}

#by setting nACQ to 0, all models converged

saveRDS(object = arrival_model_comparison_taxonomy, file = here("Model_results","arrival_model_results_taxonomy.rds"))
fwrite(arrival_model_comparison_taxonomy, file = here("Model_results","arrival_model_results_taxonomy.csv"))
arrival_model_comparison_taxonomy <- readRDS(here::here("Model_results","arrival_model_results_taxonomy.rds"))

```

Model performance and assumptions.
```{r}
#let's take a look at best performing model, and check assumptions
mod_extrarandom_gain <- glmer(col ~ seas_sst_temp_scaled  + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)

#Performance
#R-Squared
r.squaredGLMM(mod_extrarandom_gain)

#theoretical variance:
  #the variance explained by the fixed effects: 0.04
  #a variance explained by the entire model, including both fixed and random effects: 0.44

#delta (observational) variance:
  #the variance explained by the fixed effects: 0.015
  #a variance explained by the entire model, including both fixed and random effects: 0.16

#coef =  -0.49
#how much the probability changes with a given change in temperature
spp_master_ztemp_seus_buoy_scaled.r <- spp_master_ztemp_seus_buoy_scaled[complete.cases(spp_master_ztemp_seus_buoy_scaled[ , .(seas_sst_temp_scaled, col, reg, phylum, class, order, family, genus, spp)]),]

#Model assumptions using DHARMa package
#Test for overdispersion
#grouping basically transforms 0/1 response (bernoulli) in a k/n response (true binomial)
#https://cran.r-project.org/web/packages/DHARMa/vignettes/DHARMa.html#binomial-data
#group by region
simulationOutput <- simulateResiduals(fittedModel = mod_extrarandom_gain)
simulationOutput.groupedbyreg <- recalculateResiduals(simulationOutput, group = spp_master_ztemp_seus_buoy_scaled.r$reg)
testDispersion(simulationOutput)
testDispersion(simulationOutput.groupedbyreg)
plot(simulationOutput.groupedbyreg)

#group by spp
simulationOutput.groupedbyspp <- recalculateResiduals(simulationOutput, group = spp_master_ztemp_seus_buoy_scaled.r$spp)
testDispersion(simulationOutput)
testDispersion(simulationOutput.groupedbyspp)
plot(simulationOutput.groupedbyspp)

```
"The estimate for seasonality of -0.49 means that a 1 degree increase in seasonality is associated with a 0.49 decrease in the log-odds of a Gain occuring, compared to no Gain occuring. If we exponentiate this number then we obtain the odds ratio of 0.6108308, which means that for a 1 degree increase in Seasonality we expect to see (approximately) a 39% decrease in the odds of a Gain Occurring." From this [Stack Exchange post](https://stats.stackexchange.com/questions/444797/interpreting-a-generalised-linear-mixed-model-with-binomial-data). 

###Departure Models without Traits but with random effects for phylogeny
```{r departure models without traits with random effect phylogeny}

departure_model_comparison_taxonomy <- as.data.table(matrix(nrow = length(variables_scaled)))
departure_model_comparison_taxonomy[, variable:=as.factor(V1)][, coef:=as.numeric(V1)][, p_value:=as.numeric(V1)][, AICc:=as.numeric(V1)]
departure_model_comparison_taxonomy[, V1 := NULL]

for (i in 1:length(variables_scaled)){
  mod <- glmer(now_ext ~ get(variables_scaled[i]) + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)
  departure_model_comparison_taxonomy[i,variable := variables_scaled[i]]
  departure_model_comparison_taxonomy[i,coef := coef(summary(mod))[,"Estimate"][2]]
  departure_model_comparison_taxonomy[i,p_value := coef(summary(mod))[,"Pr(>|z|)"][2]]
  departure_model_comparison_taxonomy[i,AICc := AICc(mod)]
  
  print(paste(i, length(variables_scaled), sep = "/"))
    
}


saveRDS(object = departure_model_comparison_taxonomy, file = here("Model_results","departure_model_results_taxonomy.rds"))
fwrite(departure_model_comparison_taxonomy, file = here("Model_results","departure_model_results_taxonomy.csv"))

departure_model_comparison_taxonomy <- readRDS(here::here("Model_results","departure_model_results_taxonomy.rds"))

#let's take a look at best performing model, and check assumptions
mod_extrarandom_loss <- glmer(now_ext ~ seas_sst_temp_lag3_scaled  + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)

#let's look at best performing model without random effects
mod_extrarandom_loss_noRE <- glm(now_ext ~ seas_sst_temp_lag3_scaled, family = binomial, data = spp_master_ztemp_seus_buoy_scaled)

#R-Squared
r.squaredGLMM(mod_extrarandom_loss)

#theoretical variance:
  #the variance explained by the fixed effects: 0.018739262
  #a variance explained by the entire model, including both fixed and random effects: 0.4534810

#delta (observational) variance:
  #the variance explained by the fixed effects: 0.005761609
  #a variance explained by the entire model, including both fixed and random effects: 0.1394281

#coef =  -0.3242
#how much the probability changes with a given change in temperature

#Model assumptions using DHARMa package
#Test for overdispersion
#grouping basically transforms 0/1 response (bernoulli) in a k/n response (true binomial)
#https://cran.r-project.org/web/packages/DHARMa/vignettes/DHARMa.html#binomial-data
#group by region
simulationOutput_loss <- simulateResiduals(fittedModel = mod_extrarandom_loss)
simulationOutput.groupedbyreg_loss <- recalculateResiduals(simulationOutput_loss, group = spp_master_ztemp_seus_buoy_scaled.r$reg)
testDispersion(simulationOutput_loss)
testDispersion(simulationOutput.groupedbyreg_loss)
plot(simulationOutput.groupedbyreg_loss)

#no random effects
simulationOutput_loss_noRE <- simulateResiduals(fittedModel = mod_extrarandom_loss_noRE)
simulationOutput.groupedbyreg_loss_noRE <- recalculateResiduals(simulationOutput_loss_noRE, group = spp_master_ztemp_seus_buoy_scaled.r$reg)
testDispersion(simulationOutput_loss_noRE)
testDispersion(simulationOutput.groupedbyreg_loss_noRE)
plot(simulationOutput.groupedbyreg_loss_noRE)



#group by spp
simulationOutput.groupedbyspp_loss <- recalculateResiduals(simulationOutput_loss, group = spp_master_ztemp_seus_buoy_scaled.r$spp)
testDispersion(simulationOutput_loss)
testDispersion(simulationOutput.groupedbyspp_loss)
plot(simulationOutput.groupedbyspp_loss)

#with no random effects
simulationOutput.groupedbyspp_loss_noRE <- recalculateResiduals(simulationOutput_loss_noRE, group = spp_master_ztemp_seus_buoy_scaled.r$spp)
testDispersion(simulationOutput_loss_noRE)
testDispersion(simulationOutput.groupedbyspp_loss_noRE)
plot(simulationOutput.groupedbyspp_loss_noRE)

```
"The estimate for seasonality of -0.3242 means that a 1 degree increase in seasonality is associated with a 0.3242 decrease in the log-odds of a Loss occuring, compared to no Loss occuring. If we exponentiate this number then we obtain the odds ratio of 0.7231, which means that for a 1 degree increase in Seasonality we expect to see (approximately) a 18% decrease in the odds of a Loss Occurring." From this [Stack Exchange post](https://stats.stackexchange.com/questions/444797/interpreting-a-generalised-linear-mixed-model-with-binomial-data). 



##Relative Variable Importance

NB: I didn't end up using mean as a predictor variable (patterns captured in min and max, less relevant to thermal niche)
```{r get rid of any models using mean}
arrival_model_comparison_nomean <- arrival_model_comparison_taxonomy[!grepl("mean",variable),]

departure_model_comparison_nomean <- departure_model_comparison_taxonomy[!grepl("mean",variable),]


```

How does the null model with only random effects perform?
```{r null models}
#add null to tables
intercept_arrival_mod_scaled <- glmer(col ~ + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)

r.squaredGLMM(intercept_arrival_mod_scaled)

intercept_departure_mod_scaled <- glmer(now_ext ~ + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)

r.squaredGLMM(intercept_departure_mod_scaled)

#best model better than null? yes
AICc(intercept_arrival_mod_scaled)-AICc(mod_extrarandom_gain)
AICc(intercept_departure_mod_scaled)-AICc(mod_extrarandom_loss)


```
How do other loss models perform with AICc less than 0.05?
```{r}

mod_extrarandom_loss_2 <- glmer(now_ext ~ seas_sst_temp_change_lag2_scaled  + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)
r.squaredGLMM(mod_extrarandom_loss_2)

mod_extrarandom_loss_3 <- glmer(now_ext ~ min_sst_temp_change_lag2_scaled  + (1|reg) + (1|phylum) + (1|class) + (1|order) + (1|family) + (1|genus) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled, nAGQ = 0)
r.squaredGLMM(mod_extrarandom_loss_3)
```


Relative Variable Importance for Arrival Models
```{r RVI arrivals}
#add ∆AIC to tables
min_arrival_AICc <- min(arrival_model_comparison_nomean[, AICc])
arrival_model_comparison_nomean[,"deltaAICc" := (AICc - min_arrival_AICc)]

#add relative likelihood exp( -0.5 * ∆AIC score for that model)
arrival_model_comparison_nomean[,"rel_likelihood" := exp((-0.5 * deltaAICc))]
#sum relative likelihoods across all models
arrival_model_comparison_nomean.likelihoodsum <- sum(arrival_model_comparison_nomean[, rel_likelihood])
#Akaike weight for a model is this rel_likelihood divided by the sum of these values across all models
arrival_model_comparison_nomean[,"akaike_weight" := rel_likelihood/arrival_model_comparison_nomean.likelihoodsum]

#I want to look at relative importance FOR:

#bottom/surface (bottom = includes sbt, grepl("sbt", variable))
arrival_bottom_importance <- sum(arrival_model_comparison_nomean[grep("sbt", variable), ]$akaike_weight)
arrival_surface_importance <- sum(arrival_model_comparison_nomean[grep("sst", variable), ]$akaike_weight)

#lag/not lagged (grepl("lag", variable))
arrival_lag_importance <- sum(arrival_model_comparison_nomean[grep("lag", variable), ]$akaike_weight)
arrival_nolag_importance <- sum(arrival_model_comparison_nomean[!grep("lag", variable), ]$akaike_weight)

#absolute (grepl("abs", variable))
arrival_abs_importance <- sum(arrival_model_comparison_nomean[grep("abs", variable), ]$akaike_weight)

#raw (!grepl("change"))
arrival_raw_importance <- sum(arrival_model_comparison_nomean[!grep("change", variable), ]$akaike_weight)

#change (grepl("change", variable), (!grepl("abs")))
arrival_change_importance <- sum(arrival_model_comparison_nomean[grep("change", variable), ][!grep("abs", variable),]$akaike_weight)

#type of temp variable (max, min, seas)
arrival_max_temp_importance <- sum(arrival_model_comparison_nomean[grep("max", variable), ]$akaike_weight)
arrival_min_temp_importance <- sum(arrival_model_comparison_nomean[grep("min", variable), ]$akaike_weight)
arrival_seas_temp_importance <- sum(arrival_model_comparison_nomean[grep("seas", variable), ]$akaike_weight)

arrival_model_comparison_nomean_akaikeweights <- arrival_model_comparison_nomean

saveRDS(arrival_model_comparison_nomean_akaikeweights, file = here("Model_results","arrival_model_comparison_nomean_akaikeweights.rds"))
saveRDS(arrival_model_comparison_nomean_akaikeweights, file = here("tables","arrival_model_comparison_nomean_akaikeweights.rds"))
arrival_model_comparison_nomean_akaikeweights <- readRDS(file = here("Model_results","arrival_model_comparison_nomean_akaikeweights.rds"))

```

Relative Variable Importance for different Pairs of Transformation and Metric
```{r RVI arrival Pairs Transformation and Metric}

#I want to look at relative importance FOR 
#models with raw and min
arrival_raw_min_importance <- sum(arrival_model_comparison_nomean[grep("min", variable), ][!grep("change", variable)]$akaike_weight)

#models with raw and max
arrival_raw_max_importance <- sum(arrival_model_comparison_nomean[grep("max", variable), ][!grep("change", variable)]$akaike_weight)

#models with raw and seas
arrival_raw_seas_importance <- sum(arrival_model_comparison_nomean[grep("seas", variable), ][!grep("change", variable)]$akaike_weight)


#models with change and min
arrival_change_min_importance <- sum(arrival_model_comparison_nomean[grep("min_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with change and max
arrival_change_max_importance <- sum(arrival_model_comparison_nomean[grep("max_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with change and seas
arrival_change_seas_importance <- sum(arrival_model_comparison_nomean[grep("seas_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with abs change and min
arrival_abs_change_min_importance <- sum(arrival_model_comparison_nomean[grep("min_s.t_temp_change_abs", variable), ]$akaike_weight)

#models with abs change and max
arrival_abs_change_max_importance <- sum(arrival_model_comparison_nomean[grep("max_s.t_temp_change_abs", variable), ]$akaike_weight)

#models with abs change and seas
arrival_abs_change_seas_importance <- sum(arrival_model_comparison_nomean[grep("seas_s.t_temp_change_abs", variable), ]$akaike_weight)

arrival_transform_metric_comparison <- data.table(Transformation =
                                                c("raw", "raw", "raw", "change", "change", "change", "abs_change", "abs_change", "abs_change"), 
                                              Metric = 
                                                rep(c("min", "max", "seas"), 3), 
                                              RVI = 
                                                c(arrival_raw_min_importance, arrival_raw_max_importance, 
                                                  arrival_raw_seas_importance, arrival_change_min_importance, 
                                                  arrival_change_max_importance, arrival_change_seas_importance, 
                                                  arrival_abs_change_min_importance, arrival_abs_change_max_importance, 
                                                  arrival_abs_change_seas_importance))


```


Relative Variable Importance Departure Models
```{r RVI departure}
#add ∆AIC to tables
min_departure_AICc <- min(departure_model_comparison_nomean[, AICc])
departure_model_comparison_nomean[,"deltaAICc" := (AICc - min_departure_AICc)]

#add relative likelihood exp( -0.5 * ∆AIC score for that model)
departure_model_comparison_nomean[,"rel_likelihood" := exp((-0.5 * deltaAICc))]
#sum relative likelihoods across all models
departure_model_comparison_nomean.likelihoodsum <- sum(departure_model_comparison_nomean[, rel_likelihood])
#Akaike weight for a model is this rel_likelihood devided by the sum of these values across all models
departure_model_comparison_nomean[,"akaike_weight" := rel_likelihood/departure_model_comparison_nomean.likelihoodsum]

#I want to look at relative importance FOR 
#bottom/surface (bottom = includes sbt, grepl("sbt", variable))
departure_bottom_importance <- sum(departure_model_comparison_nomean[grep("sbt", variable), ]$akaike_weight)
departure_surface_importance <- sum(departure_model_comparison_nomean[grep("sst", variable), ]$akaike_weight)

#lag/not lagged (grepl("lag", variable))
departure_lag_importance <- sum(departure_model_comparison_nomean[grep("lag", variable), ]$akaike_weight)
departure_nolag_importance <- sum(departure_model_comparison_nomean[!grep("lag", variable), ]$akaike_weight)

#absolute (grepl("abs", variable))
departure_abs_importance <- sum(departure_model_comparison_nomean[grep("abs", variable), ]$akaike_weight)

#raw (!grepl("change"))
departure_raw_importance <- sum(departure_model_comparison_nomean[!grep("change", variable), ]$akaike_weight)

#change (grepl("change", variable), (!grepl("abs")))
departure_change_importance <- sum(departure_model_comparison_nomean[grep("change", variable), ][!grep("abs", variable),]$akaike_weight)

#type of temp variable (max, min, seas)
departure_max_temp_importance <- sum(departure_model_comparison_nomean[grep("max", variable), ]$akaike_weight)
departure_min_temp_importance <- sum(departure_model_comparison_nomean[grep("min", variable), ]$akaike_weight)
departure_seas_temp_importance <- sum(departure_model_comparison_nomean[grep("seas", variable), ]$akaike_weight)

departure_model_comparison_nomean_akaikeweights <- departure_model_comparison_nomean

saveRDS(departure_model_comparison_nomean_akaikeweights, file = here("Model_results","departure_model_comparison_nomean_akaikeweights.rds"))
saveRDS(departure_model_comparison_nomean_akaikeweights, file = here("tables","departure_model_comparison_nomean_akaikeweights.rds"))
departure_model_comparison_nomean_akaikeweights <- readRDS(file = here("Model_results","departure_model_comparison_nomean_akaikeweights.rds"))

```

Relative Variable Importance for Pairs of Transformation and Metric Predicting departures
```{r RVI departure Pairs Transformation and Metric}

#I want to look at relative importance FOR 
#models with raw and min
departure_raw_min_importance <- sum(departure_model_comparison_nomean[grep("min", variable), ][!grep("change", variable)]$akaike_weight)

#models with raw and max
departure_raw_max_importance <- sum(departure_model_comparison_nomean[grep("max", variable), ][!grep("change", variable)]$akaike_weight)

#models with raw and seas
departure_raw_seas_importance <- sum(departure_model_comparison_nomean[grep("seas", variable), ][!grep("change", variable)]$akaike_weight)


#models with change and min
departure_change_min_importance <- sum(departure_model_comparison_nomean[grep("min_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with change and max
departure_change_max_importance <- sum(departure_model_comparison_nomean[grep("max_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with change and seas
departure_change_seas_importance <- sum(departure_model_comparison_nomean[grep("seas_s.t_temp_change", variable), ][!grep("abs", variable)]$akaike_weight)

#models with abs change and min
departure_abs_change_min_importance <- sum(departure_model_comparison_nomean[grep("min_s.t_temp_change_abs", variable), ]$akaike_weight)

#models with abs change and max
departure_abs_change_max_importance <- sum(departure_model_comparison_nomean[grep("max_s.t_temp_change_abs", variable), ]$akaike_weight)

#models with abs change and seas
departure_abs_change_seas_importance <- sum(departure_model_comparison_nomean[grep("seas_s.t_temp_change_abs", variable), ]$akaike_weight)

departure_transform_metric_comparison <- data.table(Transformation =
                                                c("raw", "raw", "raw", "change", "change", "change", "abs_change", "abs_change", "abs_change"), 
                                              Metric = 
                                                rep(c("min", "max", "seas"), 3), 
                                              RVI = 
                                                c(departure_raw_min_importance, departure_raw_max_importance, 
                                                  departure_raw_seas_importance, departure_change_min_importance, 
                                                  departure_change_max_importance, departure_change_seas_importance, 
                                                  departure_abs_change_min_importance, departure_abs_change_max_importance, 
                                                  departure_abs_change_seas_importance))

```


Let's see how the RVI importance varies by looking at specific lag values
```{r make a plot here for variability across lag values}

#first, table for lags
lags_RVI_scaled <- data.table(arrival_lag = c(0:9), arrival_RVI = 0, departure_RVI = 0)

#arrivals
lags_RVI_scaled[1,2] <- 1-sum(arrival_model_comparison_nomean[grep("lag", variable), ]$akaike_weight)
lags_RVI_scaled[2,2] <- sum(arrival_model_comparison_nomean[grep("lag1", variable), ]$akaike_weight)
lags_RVI_scaled[3,2] <- sum(arrival_model_comparison_nomean[grep("lag2", variable), ]$akaike_weight)
lags_RVI_scaled[4,2] <- sum(arrival_model_comparison_nomean[grep("lag3", variable), ]$akaike_weight)
lags_RVI_scaled[5,2] <- sum(arrival_model_comparison_nomean[grep("lag4", variable), ]$akaike_weight)
lags_RVI_scaled[6,2] <- sum(arrival_model_comparison_nomean[grep("lag5", variable), ]$akaike_weight)
lags_RVI_scaled[7,2] <- sum(arrival_model_comparison_nomean[grep("lag6", variable), ]$akaike_weight)
lags_RVI_scaled[8,2] <- sum(arrival_model_comparison_nomean[grep("lag7", variable), ]$akaike_weight)
lags_RVI_scaled[9,2] <- sum(arrival_model_comparison_nomean[grep("lag8", variable), ]$akaike_weight)
lags_RVI_scaled[10,2] <- sum(arrival_model_comparison_nomean[grep("lag9", variable), ]$akaike_weight)

#departures
lags_RVI_scaled[1,3] <- 1-sum(departure_model_comparison_nomean[grep("lag", variable), ]$akaike_weight)
lags_RVI_scaled[2,3] <- sum(departure_model_comparison_nomean[grep("lag1", variable), ]$akaike_weight)
lags_RVI_scaled[3,3] <- sum(departure_model_comparison_nomean[grep("lag2", variable), ]$akaike_weight)
lags_RVI_scaled[4,3] <- sum(departure_model_comparison_nomean[grep("lag3", variable), ]$akaike_weight)
lags_RVI_scaled[5,3] <- sum(departure_model_comparison_nomean[grep("lag4", variable), ]$akaike_weight)
lags_RVI_scaled[6,3] <- sum(departure_model_comparison_nomean[grep("lag5", variable), ]$akaike_weight)
lags_RVI_scaled[7,3] <- sum(departure_model_comparison_nomean[grep("lag6", variable), ]$akaike_weight)
lags_RVI_scaled[8,3] <- sum(departure_model_comparison_nomean[grep("lag7", variable), ]$akaike_weight)
lags_RVI_scaled[9,3] <- sum(departure_model_comparison_nomean[grep("lag8", variable), ]$akaike_weight)
lags_RVI_scaled[10,3] <- sum(departure_model_comparison_nomean[grep("lag9", variable), ]$akaike_weight)

saveRDS(lags_RVI_scaled, here("Model_results","lags_RVI_scaled.rds"))

#visualize lags through time
ggplot(data = lags_RVI_scaled, aes(x = arrival_lag)) +
  geom_line(aes(y = arrival_RVI), ) +
  geom_line(aes(y = departure_RVI), linetype = "dashed") +
  labs(x = "Lag (years)", y = "Relative Variable Importance") +
  scale_color_discrete() + #how to make a legend
  theme_classic() +
  theme(text = element_text(size = 20)) +
  scale_x_continuous(breaks = c(0:9))

```

Table for RVI data for arrival
```{r put RVIs for arrival into data table}
arrival_departure <- c("arrival","arrival","arrival","arrival","arrival", "arrival","arrival", "arrival", "arrival","arrival","arrival","arrival","arrival","arrival", "arrival","arrival", "arrival", "arrival","arrival")
type <- c("Depth", "Depth", "lag", "lag", "Transformation", "Transformation", "Transformation", "Metric", "Metric", "Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric")
variable <- c("Bottom", "Surface", "no_lag", "lagged", "|Change|", "Change", "Raw", "Min", "Max", "Seas", "Raw x Min", "Raw x Max", "Raw x Seas", "Change x Min", "Change x Max", "Change x Seas", "|Change| x Min", "|Change| x Max", "|Change| x Seas")


value <- c(arrival_bottom_importance, arrival_surface_importance, arrival_nolag_importance, arrival_lag_importance, arrival_abs_importance, arrival_change_importance, arrival_raw_importance, arrival_min_temp_importance, arrival_max_temp_importance, arrival_seas_temp_importance, arrival_raw_min_importance, arrival_raw_max_importance, arrival_raw_seas_importance, arrival_change_min_importance, arrival_change_max_importance, arrival_change_seas_importance, arrival_abs_change_min_importance, arrival_abs_change_max_importance, arrival_abs_change_seas_importance)

RVI_arrival <- data.table(arrival_departure, type, variable, value)
```

Table for RVI data for departure
```{r put RVIs for departure into data table}
arrival_departure <- c("departure","departure","departure","departure","departure", "departure","departure", "departure", "departure","departure","departure","departure","departure","departure", "departure","departure", "departure", "departure","departure")
type <- c("Depth", "Depth", "lag", "lag", "Transformation", "Transformation", "Transformation", "Metric", "Metric", "Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric", "Transformation x Metric")
variable <- c("Bottom", "Surface", "no_lag", "lagged", "|Change|", "Change", "Raw", "Min", "Max", "Seas", "Raw x Min", "Raw x Max", "Raw x Seas", "Change x Min", "Change x Max", "Change x Seas", "|Change| x Min", "|Change| x Max", "|Change| x Seas")
value <- c(departure_bottom_importance, departure_surface_importance, departure_nolag_importance, departure_lag_importance, departure_abs_importance, departure_change_importance, departure_raw_importance, departure_min_temp_importance, departure_max_temp_importance, departure_seas_temp_importance, departure_raw_min_importance, departure_raw_max_importance, departure_raw_seas_importance, departure_change_min_importance, departure_change_max_importance, departure_change_seas_importance, departure_abs_change_min_importance, departure_abs_change_max_importance, departure_abs_change_seas_importance)

RVI_departure <- data.table(arrival_departure, type, variable, value)
```

Merge RVI tables for arrival and departure, and then graph
```{r merge RVI tables and plot}
RVI_table <- as.data.table(rbind(RVI_arrival, RVI_departure)) #bind RVI for arrivals and departures

RVI_table.r <- RVI_table[!grep("Surface", variable)][!grep("lagged", variable)]

#change order of factor levels
RVI_table.r[, variable := factor(variable, levels = c("Bottom", "no_lag", "Raw", "Change", "|Change|", "Min", "Max", "Seas"))]

saveRDS(RVI_table, file = here("Model_results", "RVI_table.rds"))

ggplot(data = RVI_table.r, aes(x=variable, y = value, fill = arrival_departure)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_discrete(breaks = c("Bottom", "no_lag", "Raw", "Change", "|Change|", "Min", "Max", "Seas"), 
                   labels = c("Bottom Temp", "No Lag", "Raw Temp", "Change in \nTemp", "| Change in\n Temp |", "Min Temp", "Max Temp", "Seas")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance")  +
  theme_bw()

```

Another way to visualize is stacked bar plot.

```{r RVI stacked plot}
#put levels into correct order for viewing, and add dummy level for legend
RVI_arrival[, variable := factor(variable, levels = c("Bottom", "Surface","  ", "Raw", "Change", "|Change|","", "no_lag", "lagged", "Min", "Max", "Seas"))]
RVI_arrival[, type := factor(type, levels = c("Depth", "Transformation", "Metric", "lag"))]

RVI_arrival_nolag <- RVI_arrival[type != "lag",]
RVI_arrival_nolag[, type := factor(type, levels = c("Depth", "Transformation", "Metric"))][, variable := factor(variable, levels = c("Bottom", "Surface","  ", "Raw", "Change", "|Change|","", "Min", "Max", "Seas"))]

saveRDS(RVI_arrival_nolag, here("Model_results", "RVI_arrival_nolag.rds"))

#and again for departure
RVI_departure[, variable := factor(variable, levels = c("Bottom", "Surface","  ", "Raw", "Change", "|Change|","", "no_lag", "lagged", "Min", "Max", "Seas"))]
RVI_departure[, type := factor(type, levels = c("Depth", "Transformation", "Metric", "lag"))]

RVI_departure_nolag <- RVI_departure[type != "lag",]
RVI_departure_nolag[, type := factor(type, levels = c("Depth", "Transformation", "Metric"))][, variable := factor(variable, levels = c("Bottom", "Surface","  ", "Raw", "Change", "|Change|","", "Min", "Max", "Seas"))]

saveRDS(RVI_departure_nolag, here("Model_results", "RVI_departure_nolag.rds"))


#plotting
pal <- wes_palette("GrandBudapest1", 8, type = "continuous")
colors <- c(pal[1], pal[2], "#FFFFFF", pal[3], pal[4], pal[5],"#FFFFFF", pal[6], pal[7], pal[8])

#arrivals
RVI_arrival_plot <- ggplot(data = RVI_arrival_nolag, aes(x=type, y = value, fill = variable)) +
  geom_bar(stat="identity", color = "black", size = 0, width = 0.8) +
  scale_x_discrete(breaks = c("Depth", "Transformation", "Metric"), labels = c("Depth", "Transformation", "Metric")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance") +
  theme_classic() +
  theme(text = element_text(size = 11.5), 
        legend.position = "right", 
        legend.title = element_blank(), 
        legend.spacing.x = unit(0.2, 'cm'),
        legend.justification = "center",
        aspect.ratio = (1),
        #, legend.position = "none"
        ) +
  guides(fill=guide_legend(ncol=1)) +
  scale_fill_manual(values = colors,
                    drop = F)

#departure
RVI_departure_plot <- ggplot(data = RVI_departure_nolag, aes(x=type, y = value, fill = variable)) +
  geom_bar(stat="identity", color = "black", size = 0, width = 0.8) +
  scale_x_discrete(breaks = c("Depth", "Transformation", "Metric"), labels = c("Depth", "Transformation", "Metric")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance") +
  theme_classic() +
  theme(text = element_text(size = 11.5), 
        legend.position = "right", 
        legend.title = element_blank(), 
        legend.spacing.x = unit(0.2, 'cm'),
        legend.justification = "center",
        aspect.ratio = (1),
        #, legend.position = "none"
        ) +
  guides(fill=guide_legend(ncol=1)) +
  scale_fill_manual(values = colors,
                    drop = F)

RVI_table
RVI_arrival_plot
RVI_departure_plot
```
Next step is to add the coefficients on to the bar chart. To do this, I will need to sum (aikake weight of model including variable x coefficient). I think this will be easy enough Using arrival_model_comparison_nomean data frame. I will add new column to data frame of aikaike weight x coefficient

```{r avg coefficients for arrival models}
#for some reason the coefficient is a factor
arrival_model_comparison_nomean[,coef_num := as.numeric(as.character(coef))]
arrival_model_comparison_nomean[,aw_coef := akaike_weight * coef_num]

#bottom temperature avg coefficient
arrival_bottom_temp_avg_coef <- sum(arrival_model_comparison_nomean[grep("sbt", variable)]$coef_num)

#surface temperature avg coefficient
arrival_surface_temp_avg_coef <- sum(arrival_model_comparison_nomean[grep("sst", variable)]$coef_num)

#raw seasonality avg coefficient
arrival_seas_raw_avg_coef <- 
  sum(arrival_model_comparison_nomean[grep("seas", variable), ][!grep("change", variable)]$coef_num)

#max abs change avg coefficient
arrival_max_abs_change_avg_coef <- 
  sum(arrival_model_comparison_nomean[grep("max_s.t_temp_change_abs", variable), ]$coef_num)


```

```{r avg coefficients for departure models}
#for some reason the coefficient is a factor
departure_model_comparison_nomean[,coef_num := as.numeric(as.character(coef))]
departure_model_comparison_nomean[,aw_coef := akaike_weight * coef_num]

#bottom temperature avg coefficient
departure_bottom_temp_avg_coef <- sum(departure_model_comparison_nomean[grep("sbt", variable)]$coef_num)

#surface temperature avg coefficient
departure_surface_temp_avg_coef <- sum(departure_model_comparison_nomean[grep("sst", variable)]$coef_num)

#raw seasonality avg coefficient
departure_seas_raw_avg_coef <- 
  sum(departure_model_comparison_nomean[grep("seas", variable), ][!grep("change", variable)]$coef_num)

#min change avg coefficient
departure_min_abs_change_avg_coef <- 
  sum(departure_model_comparison_nomean[grep("min_s.t_temp_change", variable), ][!grep("abs", variable)]$coef_num)

#seas change avg coefficient
departure_seas_change_avg_coef <- 
  sum(departure_model_comparison_nomean[grep("seas_s.t_temp_change", variable), ][!grep("abs", variable)]$coef_num)


```


*************************
*************************
*************************
*************************
(These analyses were irrelevant and therefore not repeated for revisions)
##What if we run these models with fish only? No traits? How do inverts change the story? Because it looks like above, not a ton of invertebrates are going in and out. 

```{r fish only}
#list of fish names from trait database
traits <- fread("TraitCollectionFishNAtlanticNEPacificContShelf (3).csv")

#make fish no fish key
fish <- data.table(spp = unique(traits$taxon), fish = "Y")

#combine with spp master
spp_master_ztemp_seus_buoy_scaled_fishonly <- merge(spp_master_ztemp_seus_buoy_scaled, fish, all.x = T)

#get rid of any rows that aren't observations of fish
spp_master_ztemp_seus_buoy_scaled_fishonly2 <- spp_master_ztemp_seus_buoy_scaled_fishonly[fish == "Y",]

```

Running models for fish only

```{r arrival models without traits for fish only}

#names of scaled columns
scaled_cols <- grep("*_scaled", names(spp_master_ztemp_seus_buoy_scaled_fishonly2), value = T)

#going to make loop to make all models I need to look at with single variables
variables_scaled <- colnames(spp_master_ztemp_seus_buoy_scaled_fishonly2[,scaled_cols, with = FALSE])

arrival_model_comparison_spprandom_scaled_seus_update_fishonly <- as.data.table(matrix(nrow = length(variables_scaled)))
arrival_model_comparison_spprandom_scaled_seus_update_fishonly[, variable:=as.factor(V1)][, coef:=as.numeric(V1)][, p_value:=as.numeric(V1)][, AICc:=as.numeric(V1)]
arrival_model_comparison_spprandom_scaled_seus_update_fishonly[, V1 := NULL]

for (i in 1:length(variables_scaled)){
  mod <- glmer(col ~ get(variables_scaled[i]) + (1|reg) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled_fishonly2, nAGQ = 0)
  arrival_model_comparison_spprandom_scaled_seus_update_fishonly[i,variable := variables_scaled[i]]
  arrival_model_comparison_spprandom_scaled_seus_update_fishonly[i,coef := coef(summary(mod))[,"Estimate"][2]]
  arrival_model_comparison_spprandom_scaled_seus_update_fishonly[i,p_value := coef(summary(mod))[,"Pr(>|z|)"][2]]
  arrival_model_comparison_spprandom_scaled_seus_update_fishonly[i,AICc := AICc(mod)]
  
  print(paste(i, length(variables_scaled), sep = "/"))
    
}

#by setting nACQ to 0, all models converged


```

departure Models without Traits
```{r departure models without traits for fish only}
#see setup above

departure_model_comparison_spprandom_scaled_seus_update_fishonly <- as.data.table(matrix(nrow = length(variables_scaled)))
departure_model_comparison_spprandom_scaled_seus_update_fishonly[, variable:=as.factor(V1)][, coef:=as.numeric(V1)][, p_value:=as.numeric(V1)][, AICc:=as.numeric(V1)]
departure_model_comparison_spprandom_scaled_seus_update_fishonly[, V1 := NULL]

for (i in 1:length(variables_scaled)){
  mod <- glmer(now_ext ~ get(variables_scaled[i]) + (1|reg) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled_fishonly2, nAGQ = 0)
  departure_model_comparison_spprandom_scaled_seus_update_fishonly[i,variable := variables_scaled[i]]
  departure_model_comparison_spprandom_scaled_seus_update_fishonly[i,coef := coef(summary(mod))[,"Estimate"][2]]
  departure_model_comparison_spprandom_scaled_seus_update_fishonly[i,p_value := coef(summary(mod))[,"Pr(>|z|)"][2]]
  departure_model_comparison_spprandom_scaled_seus_update_fishonly[i,AICc := AICc(mod)]
  
  print(paste(i, length(variables_scaled), sep = "/"))
    
}

#by setting nACQ to 0, all models converged


```

Relative Variable Importance
```{r get rid of any models using mean for fish only}
arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean <- arrival_model_comparison_spprandom_scaled_seus_update_fishonly[!grepl("mean",variable),]

departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean <- departure_model_comparison_spprandom_scaled_seus_update_fishonly[!grepl("mean",variable),]

#comparison to null
col_mod_null <- glmer(col ~ (1|reg) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled_fishonly2, nAGQ = 0)
AICc(col_mod_null)-min(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean$AICc)

ext_mod_null <- glmer(ext ~ (1|reg) + (1|spp), family = binomial, data = spp_master_ztemp_seus_buoy_scaled_fishonly2, nAGQ = 0)
AICc(ext_mod_null)-min(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean$AICc)
```

Relative Variable Importance for arrival Models
```{r RVI arrival for fish only}
#add ∆AIC to tables
min_col_AICc_fishonly <- min(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[, AICc])
arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"deltaAICc" := (AICc - min_col_AICc_fishonly)]

#add relative likelihood exp( -0.5 * ∆AIC score for that model)
arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"rel_likelihood" := exp((-0.5 * deltaAICc))]
#sum relative likelihoods across all models
arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean.likelihoodsum <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[, rel_likelihood])
#Akaike weight for a model is this rel_likelihood devided by the sum of these values across all models
arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"akaike_weight" := rel_likelihood/arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean.likelihoodsum]

#I want to look at relative importance FOR 
#bottom/surface (bottom = includes sbt, grepl("sbt", variable))
col_bottom_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("sbt", variable), ]$akaike_weight)
col_surface_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("sst", variable), ]$akaike_weight)

#lag/not lagged (grepl("lag", variable))
col_lag_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag", variable), ]$akaike_weight)
col_nolag_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[!grep("lag", variable), ]$akaike_weight)

#absolute (grepl("abs", variable))
col_abs_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("abs", variable), ]$akaike_weight)

#raw (!grepl("change"))
col_raw_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[!grep("change", variable), ]$akaike_weight)

#change (grepl("change", variable), (!grepl("abs")))
col_change_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("change", variable), ][!grep("abs", variable),]$akaike_weight)

#type of temp variable (max, min, seas)
col_max_temp_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("max", variable), ]$akaike_weight)
col_min_temp_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("min", variable), ]$akaike_weight)
col_seas_temp_importance_fishonly <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("seas", variable), ]$akaike_weight)
```

Relative Variable Importance for departure Models
```{r RVI departure for fish only}
#add ∆AIC to tables
min_ext_AICc_fishonly <- min(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[, AICc])
departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"deltaAICc" := (AICc - min_ext_AICc_fishonly)]

#add relative likelihood exp( -0.5 * ∆AIC score for that model)
departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"rel_likelihood" := exp((-0.5 * deltaAICc))]
#sum relative likelihoods across all models
departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean.likelihoodsum <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[, rel_likelihood])
#Akaike weight for a model is this rel_likelihood devided by the sum of these values across all models
departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[,"akaike_weight" := rel_likelihood/departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean.likelihoodsum]

#I want to look at relative importance FOR 
#bottom/surface (bottom = includes sbt, grepl("sbt", variable))
ext_bottom_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("sbt", variable), ]$akaike_weight)
ext_surface_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("sst", variable), ]$akaike_weight)

#lag/not lagged (grepl("lag", variable))
ext_lag_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag", variable), ]$akaike_weight)
ext_nolag_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[!grep("lag", variable), ]$akaike_weight)

#absolute (grepl("abs", variable))
ext_abs_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("abs", variable), ]$akaike_weight)

#raw (!grepl("change"))
ext_raw_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[!grep("change", variable), ]$akaike_weight)

#change (grepl("change", variable), (!grepl("abs")))
ext_change_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("change", variable), ][!grep("abs", variable),]$akaike_weight)

#type of temp variable (max, min, seas)
ext_max_temp_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("max", variable), ]$akaike_weight)
ext_min_temp_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("min", variable), ]$akaike_weight)
ext_seas_temp_importance_fishonly <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("seas", variable), ]$akaike_weight)
```

Let's see how the RVI importance varies by looking at specific lag values
```{r make a plot here for variability across lag values for fish only}
#first, table for lags
lags_v_RVI_spprandom_scaled_fishonly <- data.table(col_lag = c(0:9), col_RVI = 0, departure_RVI = 0)
#arrival
lags_v_RVI_spprandom_scaled_fishonly[1,2] <- 1-sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[2,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag1", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[3,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag2", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[4,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag3", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[5,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag4", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[6,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag5", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[7,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag6", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[8,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag7", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[9,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag8", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[10,2] <- sum(arrival_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag9", variable), ]$akaike_weight)
#departure
lags_v_RVI_spprandom_scaled_fishonly[1,3] <- 1-sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[2,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag1", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[3,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag2", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[4,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag3", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[5,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag4", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[6,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag5", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[7,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag6", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[8,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag7", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[9,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag8", variable), ]$akaike_weight)
lags_v_RVI_spprandom_scaled_fishonly[10,3] <- sum(departure_model_comparison_spprandom_scaled_seus_update_fishonly_nomean[grep("lag9", variable), ]$akaike_weight)

ggplot(data = lags_v_RVI_spprandom_scaled_fishonly, aes(x = col_lag)) +
  geom_line(aes(y = col_RVI), col = "blue") +
  geom_line(aes(y = departure_RVI), col = "red") +
  labs(x = "Lag", y = "Relative Variable Importance") +
  scale_color_discrete() + #how to make a legend
  theme_bw()
```

Table for RVI data for arrival
```{r put RVIs for arrival into data table for fish only}
col_ext <- c("col","col","col","col","col", "col","col", "col", "col","col")
type <- c("depth", "depth", "lag", "lag", "Change?", "Change?", "Change?", "Temp", "Temp", "Temp")
variable <- c("bottom", "surface", "no_lag", "lagged", "absolute_value_change", "change", "raw", "Min", "Max", "Seas")
value <- c(col_bottom_importance_fishonly, col_surface_importance_fishonly, col_nolag_importance_fishonly, col_lag_importance_fishonly, col_abs_importance_fishonly, col_change_importance_fishonly, col_raw_importance_fishonly, col_min_temp_importance_fishonly, col_max_temp_importance_fishonly, col_seas_temp_importance_fishonly)

RVI_col_fishonly <- data.table(col_ext, type, variable, value)
```

Table for RVI data for departure
```{r put RVIs for departure into data table for fish only}
col_ext <- c("ext","ext","ext","ext","ext", "ext", "ext", "ext", "ext", "ext")
type <- c("depth", "depth", "lag", "lag", "Change?", "Change?", "Change?", "Temp", "Temp", "Temp")
variable <- c("bottom", "surface", "no_lag", "lagged", "absolute_value_change", "change", "raw", "Min", "Max", "Seas")
value <- c(ext_bottom_importance_fishonly, ext_surface_importance_fishonly, ext_nolag_importance_fishonly, ext_lag_importance_fishonly, ext_abs_importance_fishonly, ext_change_importance_fishonly, ext_raw_importance_fishonly, ext_min_temp_importance_fishonly, ext_max_temp_importance_fishonly, ext_seas_temp_importance_fishonly)

RVI_ext_fishonly <- data.table(col_ext, type, variable, value)
```

Merge RVI tables for arrival and departure, and then graph
```{r merge RVI tables and plot for fish only}
RVI_table_fishonly <- as.data.table(rbind(RVI_col_fishonly, RVI_ext_fishonly)) #bind RVI for arrivals and departures

RVI_table.r_fishonly <- RVI_table_fishonly[!grep("surface", variable)][!grep("lagged", variable)]

#change order of factor levels
RVI_table.r_fishonly[, variable := factor(variable, levels = c("bottom", "no_lag", "raw", "change", "absolute_value_change", "Min", "Max", "Seas"))]

save(RVI_table_fishonly, file = "RVI_table_fishonly.Rdata")

ggplot(data = RVI_table.r_fishonly, aes(x=variable, y = value, fill = col_ext)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_discrete(breaks = c("bottom", "no_lag", "raw", "change", "absolute_value_change", "Min", "Max", "Seas"), labels = c("Bottom Temp", "No Lag", "Raw Temp", "Change in \nTemp", "| Change in\n Temp |", "Min Temp", "Max Temp", "Seas")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance")  +
  theme_bw()

```

Another way to visualize is stacked bar plot.

```{r RVI stacked plot for fish only}
#put levels into correct order for viewing
RVI_table_fishonly[, variable := factor(variable, levels = c("bottom", "surface", "absolute_value_change", "change", "raw", "no_lag", "lagged", "Min", "Max", "Seas"))]
#make types factors as well
RVI_table_fishonly[,type := as.factor(type)]

#arrival
RVI_col_plot_fishonly <- ggplot(data = RVI_table_fishonly[col_ext == "col",], aes(x=type, y = value, fill = variable)) +
  geom_bar(stat="identity", color = "black") +
  scale_x_discrete(breaks = c("Change?", "depth", "lag", "Temp"), labels = c("Change in\ntemp?", "Metric\nDepth", "Lagged?", "Temp")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance") +
  theme_classic() +
  theme(text = element_text(size = 22)
        #, legend.position = "none"
        )

#departure
RVI_ext_plot_fishonly <- ggplot(data = RVI_table_fishonly[col_ext == "ext",], aes(x=type, y = value, fill = variable)) +
  geom_bar(stat="identity", color = "black") +
  scale_x_discrete(breaks = c("Change?", "depth", "lag", "Temp"), labels = c("Change in\ntemp?", "Metric\nDepth", "Lagged?", "Temp")) +
  labs(x = "Temperature Variables", y = "Relative Variable Importance") +
  theme_classic()  +
  theme(text = element_text(size = 22)
        #, legend.position = "none"
        )

RVI_table_fishonly
RVI_col_plot_fishonly
RVI_ext_plot_fishonly


```
